Load packages
# rm(list = ls())
library(tictoc) # to monitor time
library(raster) # read raster files
library(rgdal) # Use GDAL functions
library(exactextractr) # Zonal statistics from raster
library(sf) # Working with shapefile layers
library(tidyverse) # data wrangling
library(plyr) # data wrangling
library(dplyr) # data wrangling
library(ggpmisc) # To place geom_text within plot
library(plotly) # Interactive visualization
library(patchwork) # This package for placing multiple plats
print("Packages loaded successfully!")
## [1] "Packages loaded successfully!"
Functions specific to this RMD
#########################################
# Creating function for exponential fit
myexpfit <- function(response_var, pred_var, mydata){
# print(paste0("Model = ", "log(",response_var, ")~", pred_var))
model.linear <- lm(paste0("log(",response_var, ")~", pred_var), data = mydata)
a1 <- exp(model.linear$coefficients[1])
b1 <- (model.linear$coefficients[2])
R2 <- summary(model.linear)$r.squared
myresults <- c(a1, b1, R2)
return(myresults)
}
#########################################
# My GGPLOT for yield map
my_yield_plot <- function(yield_df){
fieldtitle <- levels(yield_df$Fieldname)
# Legend labeling - Splitting the yield range into equal classes
minyld <- round(min(yield_df$Yield_Mg.ha),0) # minimum yield
maxyld <- round(max(yield_df$Yield_Mg.ha),0) # maximum yield
Nclasses <- 4 # Number of yield classes to split from the whole yield range
mybreaks <- round(seq(minyld, maxyld, len = (Nclasses+1)),0)
YE_colors <- c("blue","cyan","lightgreen","yellow","orange", "red")
yld_plot <- ggplot(yield_df, aes(x = Longitude, y = Latitude)) +
geom_point(aes(color = Yield_Mg.ha), size = 0.7) +
theme_bw() +
# facet_grid(Field~Year, scales = "free") +
theme(aspect.ratio = 1.0) +
# ggtitle("Actual yield (Mg/ha)")+
facet_wrap(~Fieldname, scales = "free") +
# theme(legend.position = c(0.85, 0.27))+
scale_colour_gradientn(colours = YE_colors,
name = "Yield (Mg/ha)",
limits=c(minyld,maxyld),
breaks = mybreaks,
labels = round(mybreaks, 0)) +
guides(colour = guide_colourbar(barheight = unit(1.8, "in")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face = "bold")) +
theme(legend.title = element_text(face = "bold", size=8, margin = margin(0,0,7,0)),
legend.text = element_text(vjust = 0.5),
strip.text = element_text(face = "bold", size = 10))
yld_plot
}
#########################################
my_pred_plot <- function(sub_dat){
fieldtitle <- levels(sub_dat$Fieldname)
# Legend labeling - Splitting the yield range into equal classes
minyld <- round(min(sub_dat$Pred_Yield),0) # minimum yield
maxyld <- round(max(sub_dat$Pred_Yield),0) # maximum yield
Nclasses <- 4 # Number of yield classes to split from the whole yield range
mybreaks <- round(seq(minyld, maxyld, len = (Nclasses+1)),0)
YE_colors <- c("blue","cyan","lightgreen","yellow","orange", "red")
pred_map <- ggplot(sub_dat, aes(x = Longitude, y = Latitude)) +
geom_point(aes(color = Pred_Yield), size = 0.7) +
theme_bw() +
# facet_grid(Field~Year, scales = "free") +
theme(aspect.ratio = 1.0) +
# ggtitle("Predicted yield (Mg/ha)")+
facet_wrap(~Fieldname, scales = "free") +
# theme(legend.position = c(0.85, 0.27))+
scale_colour_gradientn(colours = YE_colors,
name = "Yield (Mg/ha)",
limits=c(minyld,maxyld),
breaks = mybreaks,
labels = round(mybreaks, 0)) +
guides(colour = guide_colourbar(barheight = unit(1.8, "in")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face = "bold")) +
theme(legend.title = element_text(face = "bold", size=8, margin = margin(0,0,7,0)),
legend.text = element_text(vjust = 0.5),
strip.text = element_text(face = "bold", size = 10))
pred_map
}
#########################################
## Actual and predicted yield map
make_yield_maps <- function(sub_datf){
sel_dat4 <- sub_datf %>%
select(Fieldname, Latitude, Longitude, Yield_Mg.ha, Pred_Yield) %>%
gather("yield_type", "Value", 4:5) %>%
droplevels()
sel_dat4$yield_type <- recode_factor(factor(sel_dat4$yield_type), Pred_Yield = "Predicted yield", Yield_Mg.ha = "Actual yield")
sel_dat4$yield_type <- factor(sel_dat4$yield_type, levels = c("Actual yield", "Predicted yield"))
minyld <- round(min(sel_dat4$Value),0) # minimum yield
maxyld <- round(max(sel_dat4$Value),0) # maximum yield
Nclasses <- 4 # Number of yield classes to split from the whole yield range
mybreaks <- round(seq(minyld, maxyld, len = (Nclasses+1)),0)
YE_colors <- c("blue","cyan","lightgreen","yellow","orange", "red")
yld_plot <- ggplot(sel_dat4, aes(x = Longitude, y = Latitude)) +
geom_point(aes(color = Value), size = 0.7) +
theme_bw() +
# facet_grid(Field~Year, scales = "free") +
theme(aspect.ratio = 1.0) +
# ggtitle("Actual yield (Mg/ha)")+
facet_wrap(~yield_type, scales = "free") +
# theme(legend.position = c(0.85, 0.27))+
scale_colour_gradientn(colours = YE_colors,
name = "Yield (Mg/ha)",
limits=c(minyld,maxyld),
breaks = mybreaks,
labels = round(mybreaks, 0)) +
guides(colour = guide_colourbar(barheight = unit(1.8, "in")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face = "bold")) +
theme(legend.title = element_text(face = "bold", size=8, margin = margin(0,0,7,0)),
legend.text = element_text(vjust = 0.5),
strip.text = element_text(face = "bold", size = 10))
yld_plot
}
Load dataset
load("Quantix_deep_learning.RData", verbose = TRUE)
## Loading objects:
## dat
str(dat)
## 'data.frame': 1270534 obs. of 46 variables:
## $ Fieldname : Factor w/ 8 levels "DMD_GH1","PSF_111",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ FlightDate : Factor w/ 22 levels "2019/06/07","2019/06/09",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ DOY : int 158 158 158 158 158 158 158 158 158 158 ...
## $ Week : Factor w/ 14 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Latitude : num 43 43 43 43 43 ...
## $ Longitude : num -76.6 -76.6 -76.6 -76.6 -76.6 ...
## $ Yield : num 182 188 191 194 197 ...
## $ Yield_Mg.ha : num 11.4 11.8 12 12.2 12.4 ...
## $ Type : Factor w/ 2 levels "grain","silage": 1 1 1 1 1 1 1 1 1 1 ...
## $ N : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ Strip : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ rgn_red : num 18 18 18 18 18 18 18 18 18 18 ...
## $ rgn_green : num 16 15 15 15 15 15 15 15 15 16 ...
## $ rgn_nir : num 36 36 35 35 35 35 35 35 35 35 ...
## $ rgb_red : num 115 115 116 117 118 118 119 118 118 119 ...
## $ rgb_green : num 89 89 90 90 91 91 92 91 91 92 ...
## $ rgb_blue : num 67 67 68 68 69 69 70 69 69 70 ...
## $ NDVI : num 0.333 0.333 0.321 0.321 0.321 ...
## $ GNDVI : num 0.385 0.412 0.4 0.4 0.4 ...
## $ EVI2 : num 0.597 0.616 0.59 0.59 0.59 ...
## $ SR : num 2 2 1.94 1.94 1.94 ...
## $ EXG : num -4 -4 -4 -5 -5 -5 -5 -5 -5 -5 ...
## $ TGI : num 3.28 3.28 3.28 2.89 2.89 2.89 2.89 2.89 2.89 2.89 ...
## $ Mgmt_zone : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 2 2 2 2 ...
## $ Climod_GDD : int 10 10 10 10 10 10 10 10 10 10 ...
## $ Climod_Temp_C : num 15.6 15.6 15.6 15.6 15.6 ...
## $ Climod_Precip_mm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ACIS_GDD : int 10 10 10 10 10 10 10 10 10 10 ...
## $ ACIS_Temp_C : num 15.6 15.6 15.6 15.6 15.6 ...
## $ ACIS_Precip_mm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NASA_PAR : num 159 159 159 159 159 ...
## $ NASA_Temp_C : num 17.3 17.3 17.3 17.3 17.3 ...
## $ NASA_SH : num 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 ...
## $ NASA_RH : num 59.7 59.7 59.7 59.7 59.7 ...
## $ NASA_Precip_mm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NASA_WS : num 0.56 0.56 0.56 0.56 0.56 0.56 0.56 0.56 0.56 0.56 ...
## $ NASA_SurfWet : num 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 ...
## $ NASA_RootWet : num 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 ...
## $ NASA_ProfWet : num 0.66 0.66 0.66 0.66 0.66 0.66 0.66 0.66 0.66 0.66 ...
## $ DEM : int 125 125 125 125 125 125 125 125 125 125 ...
## $ DEM_type_Flat_Area : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DEM_type_Lower_Slope : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 2 2 ...
## $ DEM_type_Middle_Slope: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DEM_type_Ridge : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 1 1 1 ...
## $ DEM_type_Upper_Slope : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DEM_type_Valley : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
r2df_all <- data.frame()
Data preprocessing
Check for missing values
# Check if any columns have missing values
sapply(dat, anyNA)
## Fieldname FlightDate DOY
## FALSE FALSE FALSE
## Week Latitude Longitude
## FALSE FALSE FALSE
## Yield Yield_Mg.ha Type
## FALSE FALSE FALSE
## N Strip rgn_red
## TRUE TRUE FALSE
## rgn_green rgn_nir rgb_red
## FALSE FALSE FALSE
## rgb_green rgb_blue NDVI
## FALSE FALSE FALSE
## GNDVI EVI2 SR
## FALSE FALSE FALSE
## EXG TGI Mgmt_zone
## FALSE FALSE TRUE
## Climod_GDD Climod_Temp_C Climod_Precip_mm
## FALSE FALSE FALSE
## ACIS_GDD ACIS_Temp_C ACIS_Precip_mm
## FALSE FALSE FALSE
## NASA_PAR NASA_Temp_C NASA_SH
## FALSE FALSE FALSE
## NASA_RH NASA_Precip_mm NASA_WS
## FALSE FALSE FALSE
## NASA_SurfWet NASA_RootWet NASA_ProfWet
## FALSE FALSE FALSE
## DEM DEM_type_Flat_Area DEM_type_Lower_Slope
## FALSE FALSE FALSE
## DEM_type_Middle_Slope DEM_type_Ridge DEM_type_Upper_Slope
## FALSE FALSE FALSE
## DEM_type_Valley
## FALSE
# Names of columns that has missing values
names(dat)[sapply(dat, anyNA)]
## [1] "N" "Strip" "Mgmt_zone"
Plot all yield data
g_dat <- dat %>%
dplyr::filter(Type == "grain") %>%
droplevels()
my_yield_plot(g_dat)

s_dat <- dat %>%
dplyr::filter(Type == "silage") %>%
droplevels()
my_yield_plot(s_dat)

Exploratory data analysis
Distribution plots
# Violin plot
vplt <- ggplot(dat, aes(x = Fieldname, y = Yield_Mg.ha, fill = Fieldname)) +
geom_violin(alpha = 0.4) +
theme_bw() +
facet_wrap(~Type, nrow = 2, scales = "free_y")
vplt

dplt <- ggplot(dat, aes(x = Yield_Mg.ha, y = ..scaled.., fill = Fieldname)) +
geom_density(alpha = 0.4, size = 1) +
theme_bw() +
facet_wrap(~Type, nrow = 2, scales = "free")
dplt

GPS points vs yield
# "DMD_GH1" "PSF_111" "PSF_12" "SLS_ABH" "SLS_NS" "SSF_121" "SSF_202" "SSF_66"
# selfield <- "DMD_GH1"
sub_datf <- dat %>%
filter(Week == 1 & Type == "grain") %>%
droplevels()
# Legend labeling - Splitting the yield range into equal classes
minyld <- round(min(sub_datf$Yield_Mg.ha),0) # minimum yield
maxyld <- round(max(sub_datf$Yield_Mg.ha),0) # maximum yield
Nclasses <- 4 # Number of yield classes to split from the whole yield range
mybreaks <- round(seq(minyld, maxyld, len = (Nclasses+1)),0)
YE_colors <- c("blue","cyan","lightgreen","yellow","orange", "red")
#=====================
lat_plot <- ggplot(sub_datf, aes(x = Latitude, y = Yield_Mg.ha)) +
geom_point(aes(color = Yield_Mg.ha), size = 0.7) +
theme_bw() +
# facet_grid(Field~Year, scales = "free") +
theme(aspect.ratio = 1.0) +
ggtitle("Latitude vs Yield")+
facet_wrap(~Fieldname, scales = "free") +
# theme(legend.position = c(0.85, 0.27))+
scale_colour_gradientn(colours = YE_colors,
name = "Yield (Mg/ha)",
limits=c(minyld,maxyld),
breaks = mybreaks,
labels = round(mybreaks, 0)) +
guides(colour = guide_colourbar(barheight = unit(1.8, "in")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face = "bold")) +
theme(legend.title = element_text(face = "bold", size=8, margin = margin(0,0,7,0)),
legend.text = element_text(vjust = 0.5),
strip.text = element_text(face = "bold", size = 10))
lat_plot

#=====================
long_plot <- ggplot(sub_datf, aes(x = Longitude, y = Yield_Mg.ha)) +
geom_point(aes(color = Yield_Mg.ha), size = 0.7) +
theme_bw() +
# facet_grid(Field~Year, scales = "free") +
theme(aspect.ratio = 1.0) +
ggtitle("Longitude vs Yield")+
facet_wrap(~Fieldname, scales = "free") +
# theme(legend.position = c(0.85, 0.27))+
scale_colour_gradientn(colours = YE_colors,
name = "Yield (Mg/ha)",
limits=c(minyld,maxyld),
breaks = mybreaks,
labels = round(mybreaks, 0)) +
guides(colour = guide_colourbar(barheight = unit(1.8, "in")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face = "bold")) +
theme(legend.title = element_text(face = "bold", size=8, margin = margin(0,0,7,0)),
legend.text = element_text(vjust = 0.5),
strip.text = element_text(face = "bold", size = 10))
long_plot

Approach 1
1.a. Use only strip data for exponential model
# "DMD_GH1" "PSF_111" "PSF_12" "SLS_ABH" "SLS_NS" "SSF_121" "SSF_202" "SSF_66"
# "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
# Best grain field - DMD_GH1
# Best silage filed - SSF_121, PSF_12
selfield <- "DMD_GH1"
# seltype <- "silage"
sub_dat <- dat %>%
filter(!is.na(N) & Fieldname == selfield) %>%
droplevels()
my_yield_plot(sub_dat)

# "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
lvl <- levels(sub_dat$Week)
sub_dat$Strip <- as.numeric(sub_dat$Strip)
#==============
# Scatter plot between NDVI and Yield
sct_plt <- ggplot(sub_dat, aes(x = NDVI, y = Yield_Mg.ha)) +
geom_point(aes(color = Week)) +
theme_bw() +
theme(aspect.ratio = 1) +
ggtitle("NDVI vs Yield") +
xlab("NDVI") +
ylab("Yield (Mg/ha)") +
xlim(c(0.0, 1.0))
sct_plt

#==============
r2df <- data.frame()
for (i in 1:length(lvl)){
# print(i)
wsub_dat <- sub_dat %>%
filter(Week == lvl[i]) %>%
droplevels()
mod <- myexpfit("Yield_Mg.ha", "NDVI", wsub_dat)
## RMSE calculation ======
a_fit <- as.numeric(mod[1])
b_fit <- as.numeric(mod[2])
rsq <- as.numeric(mod[3])
ssub_dat <- dat %>%
filter(Week == lvl[i] & Fieldname == selfield) %>%
droplevels()
ssub_dat$Pred_Yield <- a_fit*exp(b_fit*ssub_dat$NDVI)
ssub_dat$error <- (ssub_dat$Pred_Yield - ssub_dat$Yield_Mg.ha)
sqe <- (ssub_dat$error^2)
mse <- mean(sqe)
rmse_week <- sqrt(mse)
# cat(lvl[i], ") a_fit = ", a_fit, "; b_fit = ", b_fit, ";\t R2 = ", rsq,";\t RMSE = ", rmse_week, "\n")
# cat("RMSE for", selfield, "on week", lvl[i], "is", rmse, "Mg/ha")
## ========================
r2 <- as.numeric(c(lvl[i], mod[1], mod[2], mod[3], rmse_week))
r2df <- rbind(r2df, r2)
# print(paste0(i, " = ", mod[3]))
}
colnames(r2df) <- c("Weeks", "a", "b", "R2", "RMSE")
str(r2df)
## 'data.frame': 13 obs. of 5 variables:
## $ Weeks: num 1 2 3 4 5 6 7 8 9 10 ...
## $ a : num 11.13 11.89 16.65 13.53 9.93 ...
## $ b : num 0.88 0.636 -0.359 0.216 0.761 ...
## $ R2 : num 0.03295 0.01578 0.01089 0.00963 0.41566 ...
## $ RMSE : num 2.29 2.25 2.14 2.14 1.72 ...
r2df$Fieldname <- rep(selfield, length(lvl))
r2df$Approach <- rep("1a", length(lvl))
r2df_all <- rbind(r2df_all, r2df)
#============================
## Pick the best week
best_week <- which.max(r2df$R2)
# best_week <- which.min(r2df$RMSE)
cat("The best week with max R2 is week ", best_week)
## The best week with max R2 is week 6
sub_dat1 <- dat %>%
filter(Week == best_week & Fieldname == selfield) %>%
droplevels()
a_coeff <- r2df[best_week, 2]
b_coeff <- r2df[best_week, 3]
sub_dat1$Pred_Yield <- a_coeff*exp(b_coeff*sub_dat1$NDVI)
make_yield_maps(sub_dat1)

# my_yield_plot(sub_dat1) + my_pred_plot(sub_dat1)
sub_dat1$error <- (sub_dat1$Pred_Yield - sub_dat1$Yield_Mg.ha)
mse <- mean(sub_dat1$error^2)
rmse <- sqrt(mse)
cat("RMSE for", selfield, "on week", best_week, "is", rmse, "Mg/ha")
## RMSE for DMD_GH1 on week 6 is 1.577851 Mg/ha
#======================
# Secondary axis plot
d <- r2df
x <- 'Weeks'
y1 <- 'R2'
y2 <- 'RMSE'
a <- range(d[[y1]])
b <- range(d[[y2]])
scale_factor <- diff(a)/diff(b)
d[[y2]] <- ((d[[y2]] - b[1]) * scale_factor) + a[1]
trans <- ~ ((. - a[1]) / scale_factor) + b[1]
r2_rmse_plt <- ggplot(d) +
geom_point(aes_string(x, y1)) +
geom_line(aes_string(x, y1)) +
geom_point(aes_string(x, y2), col='red') +
geom_line(aes_string(x, y2), col='red') +
ggtitle(selfield) +
theme_bw()+
theme(legend.position = c(0.8, 0.7)) +
scale_x_continuous(breaks = seq(1,15, 1))+
scale_y_continuous(sec.axis = sec_axis(trans=trans, name=y2))
r2_rmse_plt

#
# ggplotly(r2_rmse_plt)
1.b. Use lower and upper 10% of strip data
# "DMD_GH1" "PSF_111" "PSF_12" "SLS_ABH" "SLS_NS" "SSF_121" "SSF_202" "SSF_66"
selfield <- "DMD_GH1"
sub_dat <- dat %>%
filter(!is.na(N) & Fieldname == selfield) %>%
droplevels()
my_yield_plot(sub_dat)

# Evaluate qunatile cutoffs
pert <- 0.10
cutoffs <- quantile(sub_dat$Yield_Mg.ha, c(pert, (1-pert)))
cat("Cutoff for field", selfield, " =", cutoffs)
## Cutoff for field DMD_GH1 = 13.30046 16.16241
# Subset points within cutoff
qsub_dat <- sub_dat %>%
filter(Yield_Mg.ha <= cutoffs[[1]] | Yield_Mg.ha >= cutoffs[[2]]) %>%
droplevels()
my_yield_plot(qsub_dat)

#==============
# Scatter plot between NDVI and Yield
sct_plt <- ggplot(qsub_dat, aes(x = NDVI, y = Yield_Mg.ha)) +
geom_point(aes(color = Week)) +
theme_bw() +
theme(aspect.ratio = 1) +
xlab("NDVI") +
ylab("Yield (Mg/ha)") +
xlim(c(0.0, 1.0))
sct_plt

#==============
lvl <- levels(qsub_dat$Week)
r2df <- data.frame()
for (i in 1:length(lvl)){
# print(i)
wsub_dat <- qsub_dat %>%
filter(Week == lvl[i]) %>%
droplevels()
mod <- myexpfit("Yield_Mg.ha", "NDVI", wsub_dat)
## RMSE calculation ======
a_fit <- as.numeric(mod[1])
b_fit <- as.numeric(mod[2])
rsq <- as.numeric(mod[3])
ssub_dat <- dat %>%
filter(Week == lvl[i] & Fieldname == selfield) %>%
droplevels()
ssub_dat$Pred_Yield <- a_fit*exp(b_fit*ssub_dat$NDVI)
ssub_dat$error <- (ssub_dat$Pred_Yield - ssub_dat$Yield_Mg.ha)
sqe <- (ssub_dat$error^2)
mse <- mean(sqe)
rmse_week <- sqrt(mse)
# cat(lvl[i], ") a_fit = ", a_fit, "; b_fit = ", b_fit, ";\t R2 = ", rsq,";\t RMSE = ", rmse_week, "\n")
# cat("RMSE for", selfield, "on week", lvl[i], "is", rmse, "Mg/ha")
## ========================
r2 <- as.numeric(c(lvl[i], mod[1], mod[2], mod[3], rmse_week))
r2df <- rbind(r2df, r2)
# print(paste0(i, " = ", mod[3]))
}
colnames(r2df) <- c("Weeks", "a", "b", "R2", "RMSE")
str(r2df)
## 'data.frame': 13 obs. of 5 variables:
## $ Weeks: num 1 2 3 4 5 6 7 8 9 10 ...
## $ a : num 3.39 5.95 15.37 10.33 7.41 ...
## $ b : num 4.328 2.472 -0.233 0.77 1.289 ...
## $ R2 : num 0.19989 0.06955 0.00198 0.06774 0.70619 ...
## $ RMSE : num 5.43 2.54 2.04 1.97 1.86 ...
r2df$Fieldname <- rep(selfield, length(lvl))
r2df$Approach <- rep("1b", length(lvl))
r2df_all <- rbind(r2df_all, r2df)
#==========================================
## Select best week and evaluate RMSE
best_week <- which.max(r2df$R2)
cat("The best week with max R2 is week ", best_week)
## The best week with max R2 is week 6
sub_dat2 <- dat %>%
filter(Week == best_week & Fieldname == selfield)
a_coeff <- r2df[best_week, 2]
b_coeff <- r2df[best_week, 3]
sub_dat2$Pred_Yield <- a_coeff*exp(b_coeff*sub_dat2$NDVI)
make_yield_maps(sub_dat2)

# my_yield_plot(sub_dat2) + my_pred_plot(sub_dat2)
sub_dat2$error <- (sub_dat2$Pred_Yield - sub_dat2$Yield_Mg.ha)
se <- (sub_dat2$error^2)
mse <- mean(se)
rmse <- sqrt(mse)
cat("RMSE for", selfield, "on week", best_week, "is", rmse, "Mg/ha")
## RMSE for DMD_GH1 on week 6 is 1.824235 Mg/ha
#======================
# Secondary axis plot
d <- r2df
x <- 'Weeks'
y1 <- 'R2'
y2 <- 'RMSE'
a <- range(d[[y1]])
b <- range(d[[y2]])
scale_factor <- diff(a)/diff(b)
d[[y2]] <- ((d[[y2]] - b[1]) * scale_factor) + a[1]
trans <- ~ ((. - a[1]) / scale_factor) + b[1]
r2_rmse_plt <- ggplot(d) +
geom_point(aes_string(x, y1)) +
geom_line(aes_string(x, y1)) +
geom_point(aes_string(x, y2), col='red') +
geom_line(aes_string(x, y2), col='red') +
ggtitle(selfield) +
theme_bw()+
theme(legend.position = c(0.8, 0.7)) +
scale_x_continuous(breaks = seq(1,15, 1))+
scale_y_continuous(sec.axis = sec_axis(trans=trans, name=y2))
r2_rmse_plt

# ggplotly(r2_rmse_plt)
Approach 2
2.a. Use whole data without strip (select N == NA)
# "DMD_GH1" "PSF_111" "PSF_12" "SLS_ABH" "SLS_NS" "SSF_121" "SSF_202" "SSF_66"
selfield <- "DMD_GH1"
sub_dat <- dat %>%
filter(is.na(N) & Fieldname == selfield) %>%
droplevels()
my_yield_plot(sub_dat)

#==============
# Scatter plot between NDVI and Yield
sct_plt <- ggplot(sub_dat, aes(x = NDVI, y = Yield_Mg.ha)) +
geom_point(aes(color = Week)) +
theme_bw() +
theme(aspect.ratio = 1) +
xlab("NDVI") +
ylab("Yield (Mg/ha)") +
xlim(c(0.0, 1.0))
sct_plt

#==============
lvl <- levels(sub_dat$Week)
r2df <- data.frame()
for (i in 1:length(lvl)){
# print(i)
wsub_dat <- sub_dat %>%
filter(Week == lvl[i]) %>%
droplevels()
mod <- myexpfit("Yield_Mg.ha", "NDVI", wsub_dat)
## RMSE calculation ======
a_fit <- as.numeric(mod[1])
b_fit <- as.numeric(mod[2])
rsq <- as.numeric(mod[3])
ssub_dat <- dat %>%
filter(Week == lvl[i] & Fieldname == selfield) %>%
droplevels()
ssub_dat$Pred_Yield <- a_fit*exp(b_fit*ssub_dat$NDVI)
ssub_dat$error <- (ssub_dat$Pred_Yield - ssub_dat$Yield_Mg.ha)
sqe <- (ssub_dat$error^2)
mse <- mean(sqe)
rmse_week <- sqrt(mse)
# cat(lvl[i], ") a_fit = ", a_fit, "; b_fit = ", b_fit, ";\t R2 = ", rsq,";\t RMSE = ", rmse_week, "\n")
# cat("RMSE for", selfield, "on week", lvl[i], "is", rmse, "Mg/ha")
## ========================
r2 <- as.numeric(c(lvl[i], mod[1], mod[2], mod[3], rmse_week))
r2df <- rbind(r2df, r2)
# print(paste0(i, " = ", mod[3]))
}
colnames(r2df) <- c("Weeks", "a", "b", "R2", "RMSE")
str(r2df)
## 'data.frame': 13 obs. of 5 variables:
## $ Weeks: num 1 2 3 4 5 6 7 8 9 10 ...
## $ a : num 15.2 17.5 15.4 9.4 9.2 ...
## $ b : num -0.323 -0.717 -0.343 0.956 0.851 ...
## $ R2 : num 0.00406 0.01652 0.0069 0.07443 0.27984 ...
## $ RMSE : num 2.06 2.05 2.05 2 1.71 ...
r2df$Fieldname <- rep(selfield, length(lvl))
r2df$Approach <- rep("2a", length(lvl))
r2df_all <- rbind(r2df_all, r2df)
#==========================================
## Select best week and evaluate RMSE
best_week <- which.max(r2df$R2)
cat("The best week with max R2 is week ", best_week)
## The best week with max R2 is week 7
sub_dat3 <- dat %>%
filter(Week == best_week & Fieldname == selfield)
a_coeff <- r2df[best_week, 2]
b_coeff <- r2df[best_week, 3]
sub_dat3$Pred_Yield <- a_coeff*exp(b_coeff*sub_dat3$NDVI)
make_yield_maps(sub_dat3)

# my_yield_plot(sub_dat3) + my_pred_plot(sub_dat3)
sub_dat3$error <- (sub_dat3$Pred_Yield - sub_dat3$Yield_Mg.ha)
mse <- mean(sub_dat3$error^2)
rmse <- sqrt(mse)
cat("RMSE for", selfield, "on week", best_week, "is", rmse, "Mg/ha")
## RMSE for DMD_GH1 on week 7 is 1.568345 Mg/ha
#======================
# Secondary axis plot
d <- r2df
x <- 'Weeks'
y1 <- 'R2'
y2 <- 'RMSE'
a <- range(d[[y1]])
b <- range(d[[y2]])
scale_factor <- diff(a)/diff(b)
d[[y2]] <- ((d[[y2]] - b[1]) * scale_factor) + a[1]
trans <- ~ ((. - a[1]) / scale_factor) + b[1]
r2_rmse_plt <- ggplot(d) +
geom_point(aes_string(x, y1)) +
geom_line(aes_string(x, y1)) +
geom_point(aes_string(x, y2), col='red') +
geom_line(aes_string(x, y2), col='red') +
ggtitle(selfield) +
theme_bw()+
theme(legend.position = c(0.8, 0.7)) +
scale_x_continuous(breaks = seq(1,15, 1))+
scale_y_continuous(sec.axis = sec_axis(trans=trans, name=y2))
r2_rmse_plt

# ggplotly(r2_rmse_plt)
2.b. Use 10 and 90 percentile of strip data
# "DMD_GH1" "PSF_111" "PSF_12" "SLS_ABH" "SLS_NS" "SSF_121" "SSF_202" "SSF_66"
selfield <- "DMD_GH1"
sub_dat <- dat %>%
filter(is.na(N) & Fieldname == selfield) %>%
droplevels()
my_yield_plot(sub_dat)

# Evaluate quantile cutoffs
pert <- 0.10
cutoffs <- quantile(sub_dat$Yield_Mg.ha, c(pert, (1-pert)))
cat("Cutoff for field", selfield, " =", cutoffs)
## Cutoff for field DMD_GH1 = 10.73753 16.14555
# Subset points within cutoff
qsub_dat <- sub_dat %>%
filter(Yield_Mg.ha <= cutoffs[[1]] | Yield_Mg.ha >= cutoffs[[2]]) %>%
droplevels()
my_yield_plot(qsub_dat)

#==============
# Scatter plot between NDVI and Yield
sct_plt <- ggplot(qsub_dat, aes(x = NDVI, y = Yield_Mg.ha)) +
geom_point(aes(color = Week)) +
theme_bw() +
theme(aspect.ratio = 1) +
xlab("NDVI") +
ylab("Yield (Mg/ha)") +
xlim(c(0.0, 1.0))
sct_plt

#==============
lvl <- levels(qsub_dat$Week)
r2df <- data.frame()
for (i in 1:length(lvl)){
# print(i)
wsub_dat <- qsub_dat %>%
filter(Week == lvl[i]) %>%
droplevels()
mod <- myexpfit("Yield_Mg.ha", "NDVI", wsub_dat)
## RMSE calculation ======
a_fit <- as.numeric(mod[1])
b_fit <- as.numeric(mod[2])
rsq <- as.numeric(mod[3])
ssub_dat <- dat %>%
filter(Week == lvl[i] & Fieldname == selfield) %>%
droplevels()
ssub_dat$Pred_Yield <- a_fit*exp(b_fit*ssub_dat$NDVI)
ssub_dat$error <- (ssub_dat$Pred_Yield - ssub_dat$Yield_Mg.ha)
sqe <- (ssub_dat$error^2)
mse <- mean(sqe)
rmse_week <- sqrt(mse)
cat(lvl[i], ") a_fit = ", a_fit, "; b_fit = ", b_fit, ";\t R2 = ", rsq,";\t RMSE = ", rmse_week, "\n")
# cat("RMSE for", selfield, "on week", lvl[i], "is", rmse, "Mg/ha")
## ========================
r2 <- as.numeric(c(lvl[i], mod[1], mod[2], mod[3], rmse_week))
r2df <- rbind(r2df, r2)
# print(paste0(i, " = ", mod[3]))
}
## 1 ) a_fit = 15.22562 ; b_fit = -0.6268641 ; R2 = 0.004506561 ; RMSE = 2.572662
## 2 ) a_fit = 25.20251 ; b_fit = -2.013282 ; R2 = 0.03934784 ; RMSE = 2.501949
## 3 ) a_fit = 17.97113 ; b_fit = -1.004249 ; R2 = 0.01877287 ; RMSE = 2.493913
## 4 ) a_fit = 3.390362 ; b_fit = 3.314632 ; R2 = 0.2540942 ; RMSE = 3.395473
## 5 ) a_fit = 5.633953 ; b_fit = 1.754935 ; R2 = 0.548074 ; RMSE = 2.311081
## 6 ) a_fit = 5.398522 ; b_fit = 1.405825 ; R2 = 0.6169247 ; RMSE = 1.869549
## 7 ) a_fit = 4.295758 ; b_fit = 1.487639 ; R2 = 0.5689611 ; RMSE = 1.694422
## 8 ) a_fit = 2.240503 ; b_fit = 2.073151 ; R2 = 0.395068 ; RMSE = 1.761064
## 9 ) a_fit = 1.322938 ; b_fit = 2.585552 ; R2 = 0.2829638 ; RMSE = 1.958049
## 10 ) a_fit = 0.2277545 ; b_fit = 4.616331 ; R2 = 0.2371498 ; RMSE = 2.045412
## 12 ) a_fit = 2.476396 ; b_fit = 1.840895 ; R2 = 0.03169973 ; RMSE = 2.481548
## 13 ) a_fit = 0.005646806 ; b_fit = 8.835749 ; R2 = 0.4714134 ; RMSE = 1.758735
## 14 ) a_fit = 0.02793306 ; b_fit = 7.227301 ; R2 = 0.3882082 ; RMSE = 1.901906
colnames(r2df) <- c("Weeks", "a", "b", "R2", "RMSE")
str(r2df)
## 'data.frame': 13 obs. of 5 variables:
## $ Weeks: num 1 2 3 4 5 6 7 8 9 10 ...
## $ a : num 15.23 25.2 17.97 3.39 5.63 ...
## $ b : num -0.627 -2.013 -1.004 3.315 1.755 ...
## $ R2 : num 0.00451 0.03935 0.01877 0.25409 0.54807 ...
## $ RMSE : num 2.57 2.5 2.49 3.4 2.31 ...
r2df$Fieldname <- rep(selfield, length(lvl))
r2df$Approach <- rep("2b", length(lvl))
r2df_all <- rbind(r2df_all, r2df)
#==========================================
## Select best week and evaluate RMSE
best_week <- which.max(r2df$R2)
cat("The best week with max R2 is week ", best_week)
## The best week with max R2 is week 6
sub_dat4 <- dat %>%
filter(Week == best_week & Fieldname == selfield) %>%
droplevels()
a_coeff <- r2df[best_week, 2]
b_coeff <- r2df[best_week, 3]
sub_dat4$Pred_Yield <- a_coeff*exp(b_coeff*sub_dat4$NDVI)
make_yield_maps(sub_dat4)

# my_yield_plot(sub_dat4) + my_pred_plot(sub_dat4)
sub_dat4$error <- (sub_dat4$Pred_Yield - sub_dat4$Yield_Mg.ha)
mse <- mean(sub_dat4$error^2)
rmse <- sqrt(mse)
cat("RMSE for", selfield, "on week", best_week, "is", rmse, "Mg/ha")
## RMSE for DMD_GH1 on week 6 is 1.869549 Mg/ha
#======================
# Secondary axis plot
d <- r2df
x <- 'Weeks'
y1 <- 'R2'
y2 <- 'RMSE'
#-----------------------------------------------------------------------------
# Rescale the second y axis by
# - subtracting its minimum value (to set it to start at 0)
# - scaling so that it has the same range as the 'y1' variable
# - offsettting it by the minimum value of y1
#-----------------------------------------------------------------------------
a <- range(d[[y1]])
b <- range(d[[y2]])
scale_factor <- diff(a)/diff(b)
d[[y2]] <- ((d[[y2]] - b[1]) * scale_factor) + a[1]
#-----------------------------------------------------------------------------
# Need to define the second axis transformation to be the inverse of the data
# transformation to everything cancels out appropriately
#-----------------------------------------------------------------------------
trans <- ~ ((. - a[1]) / scale_factor) + b[1]
#-----------------------------------------------------------------------------
# tell the y axis to set up a scaled secondary axis with the given transform
#-----------------------------------------------------------------------------
r2_rmse_plt <- ggplot(d) +
geom_point(aes_string(x, y1)) +
geom_line(aes_string(x, y1)) +
geom_point(aes_string(x, y2), col='red') +
geom_line(aes_string(x, y2), col='red') +
ggtitle(selfield) +
theme_bw()+
theme(legend.position = c(0.8, 0.7)) +
scale_x_continuous(breaks = seq(1,15, 1))+
scale_y_continuous(sec.axis = sec_axis(trans=trans, name=y2))
r2_rmse_plt

# ggplotly(r2_rmse_plt)
Saving the combined metrics data into CSV
# write.csv(r2df_all, "Exponential_models_March29.csv", row.names = FALSE)
Load the saved CSV
r2_df <- read.csv("Exponential_models_March29.csv")
str(r2_df)
## 'data.frame': 436 obs. of 7 variables:
## $ Weeks : int 1 2 3 4 5 6 7 8 9 10 ...
## $ a : num 11.13 11.89 16.65 13.53 9.93 ...
## $ b : num 0.88 0.636 -0.359 0.216 0.761 ...
## $ R2 : num 0.03295 0.01578 0.01089 0.00963 0.41566 ...
## $ RMSE : num 2.29 2.25 2.14 2.14 1.72 ...
## $ Fieldname: chr "DMD_GH1" "DMD_GH1" "DMD_GH1" "DMD_GH1" ...
## $ Approach : chr "1a" "1a" "1a" "1a" ...
fcols <- c("Fieldname", "Approach")
r2_df[fcols] <- lapply(r2_df[fcols], factor)
Cumulative distribution plot per CropType
# Subset grain or silage
crop_dat <- dat %>%
filter(Type == "grain") %>%
droplevels()
g_cum_dist_plot <- ggplot(crop_dat, aes(Yield_Mg.ha)) +
stat_ecdf(aes(color = Fieldname), geom = "step") +
theme_bw()
g_cum_dist_plot

# Silage yield cumulative distribution
crop_dat <- dat %>%
filter(Type == "silage") %>%
droplevels()
s_cum_dist_plot <- ggplot(crop_dat, aes(Yield_Mg.ha)) +
stat_ecdf(aes(color = Fieldname), geom = "step") +
theme_bw()
s_cum_dist_plot

# #===========================
# hist_plot <- ggplot(crop_dat, aes(x = Yield_Mg.ha, y = ..scaled.., fill = Fieldname)) +
# # stat_ecdf(aes(color = Fieldname), geom = "step") +
# geom_density(aes(color = Fieldname), alpha = 0.2) +
# theme_bw()
#
# hist_plot